In [ ]:
%run "../Functions/8. RM-GF correlations.ipynb"
In [ ]:
allData = allDataPlaytestPhase1PretestPosttestUniqueProfilesVolunteers.copy()
In [ ]:
#def getScoresOnQuestionsFromAllData(allData, Qs):
In [ ]:
correctPerMaxChapter = pd.DataFrame(index = posttestScientificQuestions, columns = range(15))
In [ ]:
allData.loc[:, allData.loc['maxChapter', :] == 10].columns
In [ ]:
# when reaching checkpoint N, what is the rate of good answer for question Q?
maxCheckpointsDF = pd.DataFrame(index = ['maxCh'], columns=range(15))
for chapter in allData.loc['maxChapter', :].unique():
eltsCount = len(allData.loc[:, allData.loc['maxChapter', :] == chapter].columns)
maxCheckpointsDF.loc['maxCh', chapter] = eltsCount
for q in posttestScientificQuestions:
interestingElts = allData.loc[q, allData.loc['maxChapter', :] == chapter]
scoreSum = interestingElts.sum()
correctPerMaxChapter.loc[q, chapter] = int(scoreSum * 100 / eltsCount)
correctPerMaxChapterNotNan = correctPerMaxChapter.fillna(-1)
_fig1 = plt.figure(figsize=(20,20))
_ax1 = plt.subplot(111)
_ax1.set_title("maxCheckpointsDF")
sns.heatmap(
correctPerMaxChapterNotNan,
ax=_ax1,
cmap=plt.cm.jet,
square=True,
annot=True,
fmt='d',
)
maxCheckpointsDFNotNan = maxCheckpointsDF.fillna(0)
_fig2 = plt.figure(figsize=(14,2))
_ax2 = plt.subplot(111)
_ax2.set_title("maxCheckpointsDF")
sns.heatmap(
maxCheckpointsDFNotNan,
ax=_ax2,
cmap=plt.cm.jet,
square=True,
annot=True,
fmt='d',
)
In [ ]:
corrChapterScQDF = pd.DataFrame(index=posttestScientificQuestions, columns=['corr'])
# when reaching checkpoint N, what is the rate of good answer for question Q?
for q in posttestScientificQuestions:
corrChapterScQDF.loc[q, 'corr'] = np.corrcoef(allData.loc[q,:].values, allData.loc['maxChapter',:].values)[1,0]
corrChapterScQDFNotNan = corrChapterScQDF.fillna(-2)
_fig1 = plt.figure(figsize=(14,10))
_ax1 = plt.subplot(111)
_ax1.set_title("corrChapterScQDFNotNan")
sns.heatmap(
corrChapterScQDFNotNan,
ax=_ax1,
cmap=plt.cm.jet,
square=True,
annot=True,
fmt='.2f',
vmin=-1,
vmax=1,
)
In [ ]:
In [ ]:
from sklearn.cluster import KMeans
from sklearn.neighbors.kde import KernelDensity
In [ ]:
X = np.array([[0.9], [1], [1.1], [4], [4.1], [4.2], [5]])
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
kmeans.inertia_
In [ ]:
kmeans.labels_
In [ ]:
kmeans.cluster_centers_
In [ ]:
kmeans.predict([[3], [4]])
In [ ]:
inertiaThreshold = 1
In [ ]:
for question in scientificQuestions:
posttestQuestion = answerTemporalities[1] + " " + question
#deltaQuestion = delta + " " + question
allDataPlaytestPhase1PretestPosttestUniqueProfilesVolunteers.loc[posttestQuestion, :]
In [ ]:
X = [[x] for x in allDataPlaytestPhase1PretestPosttestUniqueProfilesVolunteers.loc[posttestQuestion, :].values]
clusterCount = 3
kmeans = KMeans(n_clusters=clusterCount, random_state=0).fit(X)
if len(np.unique(kmeans.labels_)) != clusterCount:
print("incorrect number of clusters")
kmeans.inertia_
In [ ]:
In [ ]:
X = np.array([[-1], [-2], [-3], [1], [2], [3]])
kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X)
kde.score_samples(X)
In [ ]:
X = np.array([-1, -2, -3, 1, 2, 3])
kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X.reshape(-1, 1))
kde.score_samples(X.reshape(-1, 1))
X.reshape(-1, 1)